library(dplyr)
library(lubridate)
library(plotly)
Reading and transforming the data of the 1500m finalists at the Paris 2024 Olympic Games. In addition, we keep only the data from the finals. In this way we ‘better’ simulate the race we are interested in. The performance of a runner in ‘non-finals’ does not have to match his performance in finals.
JJOO_finalist_data_t_12 <- read.csv("~/data/DSLAB/Proyectos/SPORTS_ANALYTICS/RACES/JJOO_finalist_data_t_12.csv")
sim1 = JJOO_finalist_data_t_12
sim1 = sim1 %>%
mutate(mark_s = minute(ms(sim1$mark)) + second(ms(sim1$mark))/60)
sim_f = sim1[sim1$race=="F",]
head(sim_f)
## X race competition date disciplineCode
## 3 3 F U.S. Olympic Team Trials 2024-06-24T00:00:00.000Z 1500
## 6 6 F The XXXIII Olympic Games 2024-08-06T00:00:00.000Z 1500
## 7 7 F Athletissima Lausanne 2024-08-22T00:00:00.000Z 1500
## 8 8 F Weltklasse Zürich 2024-09-05T00:00:00.000Z 1500
## 9 9 F Memorial van Damme 2024-09-13T00:00:00.000Z 1500
## 10 10 F USA Indoor Championships 2024-02-17T00:00:00.000Z 1500
## mark Name mark_s
## 3 3:30.59 Cole Hocker 3.509833
## 6 3:27.65 Cole Hocker 3.460833
## 7 3:29.85 Cole Hocker 3.497500
## 8 3:30.46 Cole Hocker 3.507667
## 9 3:30.94 Cole Hocker 3.515667
## 10 3:37.51 Cole Hocker 3.625167
The mean and sd of the times for the athletes involved are shown.
sim2 =sim_f %>%
group_by(Name) %>%
summarise(mean = mean(mark_s),sd=sqrt(var(mark_s)),min=min(mark_s))
sim2
## # A tibble: 12 × 4
## Name mean sd min
## <chr> <dbl> <dbl> <dbl>
## 1 "Brian Komen" 3.60 0.0718 3.48
## 2 "Cole Hocker" 3.53 0.0536 3.46
## 3 "Hobbs Kessler" 3.56 0.0535 3.49
## 4 "Jakob Ingebrigtsen" 3.50 0.0366 3.45
## 5 "Josh Kerr" 3.50 0.0263 3.46
## 6 "NORDAS Narve Gilje " 3.56 0.0434 3.49
## 7 "Neil Gourley" 3.55 0.0504 3.51
## 8 "Niels Laros" 3.52 0.0156 3.49
## 9 "Pietro Arese" 3.59 0.0582 3.51
## 10 "Stefan Nillessen" 3.60 0.0801 3.51
## 11 "Timothy Cheruiyot" 3.55 0.0619 3.48
## 12 "Yared Nuguse" 3.50 0.0241 3.46
And drawing its density with the observed data.
p=ggplot(sim_f,aes(mark_s,col=Name))+geom_density()
library(plotly)
ggplotly(p)
In the following section the running times for each runner are simulated assuming a Gamma distribution.
# parámetros de una gamma
alpha=sim2$mean^2/sim2$sd^2
beta=sim2$sd^2/sim2$mean
Create a function for the simulation.
simula_class = function(N=100,class=matrix(0,N,12),alpha,beta,p){
for (i in 1:N){
res1=c(0,12)
for (j in 1:12){
res1[j]=rnorm(1,sim2[j,]$mean,sim2[j,]$sd)
a=rgamma(1,alpha[j],scale=beta[j])
while(a<sim2$min[j]*p){
a=rgamma(1,alpha[j],scale=beta[j])
}
res1[j]=a
}
class[i,]=sort(res1,index.return=TRUE)$ix
}
return(class=class)
}
The previous function is used to simulate.
class=simula_class(N=10000,alpha=alpha,beta=beta,p=1)
head(class)
## [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12]
## [1,] 11 1 3 5 12 4 8 2 7 10 6 9
## [2,] 4 5 8 12 2 6 3 7 9 10 1 11
## [3,] 4 5 12 8 7 6 9 1 3 11 2 10
## [4,] 4 5 11 12 7 2 8 6 3 1 10 9
## [5,] 4 5 8 12 6 2 7 10 3 11 9 1
## [6,] 1 12 5 8 4 2 7 11 3 6 10 9
For athletes, the probabilities of 1st, 2nd and 3rd are calculated:
N=dim(class)[1]
prob1=round(100*table(class[,1])/N,2)
filas=as.numeric(names(prob1))
prob_vic=c(0,12)
prob_vic[filas]=prob1
prob1=round(100*table(class[,2])/N,2)
filas=as.numeric(names(prob1))
prob_2=c(0,12)
prob_2[filas]=prob1
prob1=round(100*table(class[,3])/N,2)
filas=as.numeric(names(prob1))
prob_3=c(0,12)
prob_3[filas]=prob1
prob_pod=as.data.frame(cbind(prob_vic,prob_2,prob_3))
prob_pod=prob_pod %>%
rowwise() %>%
mutate(prob_pod = sum(prob_vic, prob_2,prob_3, na.rm = TRUE))
prob1=cbind(sim2,Prob_VIC=as.vector(prob_vic),Prob_POD=as.vector(prob_pod$prob_pod))
prob1=as.data.frame(prob1)
prob_final=prob1[order(prob1$Prob_VIC,decreasing=TRUE),]
prob_final
## Name mean sd min Prob_VIC Prob_POD
## 4 Jakob Ingebrigtsen 3.496407 0.03664868 3.445500 34.43 63.80
## 5 Josh Kerr 3.496417 0.02630888 3.463167 25.59 69.02
## 12 Yared Nuguse 3.500095 0.02410718 3.463333 21.53 66.36
## 2 Cole Hocker 3.530352 0.05361465 3.460833 10.12 27.54
## 11 Timothy Cheruiyot 3.547271 0.06186217 3.478500 2.97 14.42
## 8 Niels Laros 3.515208 0.01555538 3.492333 2.88 35.42
## 1 Brian Komen 3.597205 0.07177189 3.480000 1.33 6.53
## 3 Hobbs Kessler 3.563937 0.05354333 3.490833 0.69 7.06
## 6 NORDAS Narve Gilje 3.559750 0.04339876 3.494667 0.42 5.93
## 7 Neil Gourley 3.548033 0.05041376 3.510833 0.02 2.50
## 10 Stefan Nillessen 3.601019 0.08012015 3.512500 0.02 0.77
## 9 Pietro Arese 3.593444 0.05817663 3.512333 NA 0.65
It is possible to face questions such as: What is the probability that a runner, Kerr, will be ahead of another, “Ingebrigtsen”?
prob_Kerr_Ingebrigtsen=100*sum(which(class[1:N,]==5)<which(class[1:N,]==4))/N
prob_Kerr_Ingebrigtsen
## [1] 48.37
The exercise is repeated with \(p=1.01\) so that the slow runs of each runner are eliminated.
class=simula_class(N=10000,alpha=alpha,beta=beta,p=1.01)
head(class)
## [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12]
## [1,] 4 5 12 2 8 9 6 11 7 1 3 10
## [2,] 4 2 5 12 8 11 6 7 1 9 3 10
## [3,] 4 12 11 5 8 2 10 7 3 6 1 9
## [4,] 12 5 2 8 7 4 10 3 6 9 1 11
## [5,] 12 5 4 2 3 8 7 11 6 9 10 1
## [6,] 12 5 4 8 6 1 11 2 7 10 3 9
Once again, the probabilities of 1st, 2nd and 3rd are calculated:
N=dim(class)[1]
prob1=round(100*table(class[,1])/N,2)
filas=as.numeric(names(prob1))
prob_vic=c(0,12)
prob_vic[filas]=prob1
prob1=round(100*table(class[,2])/N,2)
filas=as.numeric(names(prob1))
prob_2=c(0,12)
prob_2[filas]=prob1
prob1=round(100*table(class[,3])/N,2)
filas=as.numeric(names(prob1))
prob_3=c(0,12)
prob_3[filas]=prob1
prob_pod=as.data.frame(cbind(prob_vic,prob_2,prob_3))
prob_pod=prob_pod %>%
rowwise() %>%
mutate(prob_pod = sum(prob_vic, prob_2,prob_3, na.rm = TRUE))
prob1=cbind(sim2,Prob_VIC=as.vector(prob_vic),Prob_POD=as.vector(prob_pod$prob_pod))
prob1=as.data.frame(prob1)
prob_rapido=prob1[order(prob1$Prob_VIC,decreasing=TRUE),]
prob_rapido
## Name mean sd min Prob_VIC Prob_POD
## 4 Jakob Ingebrigtsen 3.496407 0.03664868 3.445500 44.35 74.19
## 5 Josh Kerr 3.496417 0.02630888 3.463167 23.90 78.77
## 12 Yared Nuguse 3.500095 0.02410718 3.463333 22.89 78.81
## 2 Cole Hocker 3.530352 0.05361465 3.460833 7.55 28.31
## 11 Timothy Cheruiyot 3.547271 0.06186217 3.478500 0.65 9.85
## 8 Niels Laros 3.515208 0.01555538 3.492333 0.38 21.84
## 1 Brian Komen 3.597205 0.07177189 3.480000 0.24 4.06
## 3 Hobbs Kessler 3.563937 0.05354333 3.490833 0.03 2.45
## 6 NORDAS Narve Gilje 3.559750 0.04339876 3.494667 0.01 1.63
## 7 Neil Gourley 3.548033 0.05041376 3.510833 NA 0.07
## 9 Pietro Arese 3.593444 0.05817663 3.512333 NA 0.02
## 10 Stefan Nillessen 3.601019 0.08012015 3.512500 NA 0.00
The results are plotted.
datos=merge(prob_final,prob_rapido,by="Name")
datos=datos[,-c(2,3,4,7,8,9)]
colnames(datos)=c("Name","Vic_final","Pod_final","Vic_rapida","Pod_rapida")
datos[is.na(datos)]=0
datos=datos[order(datos$Vic_final,decreasing=TRUE),]
rownames(datos)=seq(1:12)
datos$Name=as.factor(datos$Name)
datos$Name <- factor(datos$Name, levels = (datos$Name)[order(datos$Vic_final, decreasing = TRUE)])
fig <- plot_ly(datos, x = ~Name, y = ~Vic_final, type = 'bar', name = 'Nomal paced')
fig <- fig %>% add_trace(y = ~Vic_rapida, name = 'Fast paced')
fig <- fig %>% layout(yaxis = list(title = 'Probability'), barmode = 'group',xaxis=list(title = ' '))
fig <- fig %>% layout(title = list(text='Golden probabilities for the finalists',y=.975))
fig
The graph reflects the odds of winning the race for each rider in two different scenarios: when the race is fast and in a normal scenario.
datos$Name=as.factor(datos$Name)
datos$Name <- factor(datos$Name, levels = (datos$Name)[order(datos$Pod_final, decreasing = TRUE)])
fig <- plot_ly(datos, x = ~Name, y = ~Pod_final, type = 'bar', name = 'Nomal paced')
fig <- fig %>% add_trace(y = ~Pod_rapida, name = 'Fast paced')
fig <- fig %>% layout(yaxis = list(title = 'Probability'), barmode = 'group',xaxis=list(title = ' '))
fig <- fig %>% layout(title = list(text='Podium probabilities for the finalists',y=.975))
fig
The graph reflects the podium probabilities (finishing the race in the top three) for each rider in two different scenarios: when the race is fast and in a normal scenario.
Isaac